/*
 * Copyright (C) 2019-2020 Yaong <yaongtime@gmail.com>
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#include "vc4_private.h"
#include "mesa-sha1.h"
#include "compiler/nir/nir_builder.h"
#include "nir/nir_serialize.h"
#include "vk_format.h"

extern uint32_t vc4_debug;

static const struct spirv_to_nir_options default_spirv_options =  {
   .caps = { false },
   .ubo_addr_format = nir_address_format_32bit_index_offset,
   .ssbo_addr_format = nir_address_format_32bit_index_offset,
   .phys_ssbo_addr_format = nir_address_format_64bit_global,
   .push_const_addr_format = nir_address_format_logical,
   .shared_addr_format = nir_address_format_32bit_offset,
   .frag_coord_is_sysval = false,
};

static const nir_shader_compiler_options vc4_nir_options = {
    .lower_all_io_to_temps = true,
    .lower_extract_byte = true,
    .lower_extract_word = true,
    .lower_fdiv = true,
    .lower_ffma16 = true,
    .lower_ffma32 = true,
    .lower_ffma64 = true,
    .lower_flrp32 = true,
    .lower_fmod = true,
    .lower_fpow = true,
    .lower_fsat = true,
    .lower_fsqrt = true,
    .lower_ldexp = true,
    .lower_negate = true,
    .lower_rotate = true,
    .lower_to_scalar = true,
    .lower_umax = true,
    .lower_umin = true,
    .lower_isign = true,
    .max_unroll_iterations = 32,
};

static const struct vc4_dynamic_state default_dynamic_state = {
   .viewport = {
      .count = 0,
   },
   .scissor = {
      .count = 0,
   },
   .stencil_compare_mask =
   {
     .front = ~0u,
     .back = ~0u,
   },
   .stencil_write_mask =
   {
     .front = ~0u,
     .back = ~0u,
   },
   .stencil_reference =
   {
     .front = 0u,
     .back = 0u,
   },
   .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f },
   .depth_bias = {
      .constant_factor = 0.0f,
      .slope_factor = 0.0f,
   },
   .line_width = 1.0f,
};

VkResult
vc4_CreateShaderModule(VkDevice _device,
                        const VkShaderModuleCreateInfo *pCreateInfo,
                        const VkAllocationCallbacks *pAllocator,
                        VkShaderModule *pShaderModule)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);
   struct vc4_shader_module *module;

   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
   assert(pCreateInfo->flags == 0);

   module = vk_alloc2(&device->vk.alloc, pAllocator,
                      sizeof(*module) + pCreateInfo->codeSize, 8,
                      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   if (module == NULL)
      return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);

   module->nir = NULL;

   module->size = pCreateInfo->codeSize;
   memcpy(module->data, pCreateInfo->pCode, module->size);

   _mesa_sha1_compute(module->data, module->size, module->sha1);

   *pShaderModule = vc4_shader_module_to_handle(module);

   return VK_SUCCESS;
}

void
vc4_DestroyShaderModule(VkDevice _device,
                         VkShaderModule _module,
                         const VkAllocationCallbacks *pAllocator)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);
   VC4_FROM_HANDLE(vc4_shader_module, module, _module);

   if (!module)
      return;

   /* NIR modules (which are only created internally by the driver) are not
    * dynamically allocated so we should never call this for them.
    * Instead the driver is responsible for freeing the NIR code when it is
    * no longer needed.
    */
   assert(module->nir == NULL);

   vk_free2(&device->vk.alloc, pAllocator, module);
}

static int
type_size(const struct glsl_type *type, bool bindless)
{
   return glsl_count_attribute_slots(type, false);
}

static unsigned
descriptor_map_add(struct vc4_descriptor_map *map,
                   int set,
                   int binding,
                   int array_index,
                   int array_size,
                   bool is_shadow)
{
   assert(array_index < array_size);

   unsigned index = 0;
   for (unsigned i = 0; i < map->num_desc; i++) {
      if (set == map->set[i] &&
          binding == map->binding[i] &&
          array_index == map->array_index[i]) {
         assert(array_size == map->array_size[i]);
         return index;
      }
      index++;
   }

   assert(index == map->num_desc);

   map->set[map->num_desc] = set;
   map->binding[map->num_desc] = binding;
   map->array_index[map->num_desc] = array_index;
   map->array_size[map->num_desc] = array_size;
   map->is_shadow[map->num_desc] = is_shadow;
   map->num_desc++;

   return index;
}

static void lower_tex_src_to_offset(nir_builder *b,
                                    nir_tex_instr *instr, unsigned src_idx,
                                    struct vc4_pipeline *pipeline,
                                    const struct vc4_pipeline_layout *layout)
{
   nir_tex_src *src = &instr->src[src_idx];
   bool is_sampler = src->src_type == nir_tex_src_sampler_deref;
   unsigned base_index = 0;

   nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);

   assert(deref->deref_type == nir_deref_type_var);

   nir_tex_instr_remove_src(instr, src_idx);

   uint32_t set = deref->var->data.descriptor_set;
   uint32_t binding = deref->var->data.binding;
   struct vc4_descriptor_set_layout *set_layout = layout->set[set].layout;
   struct vc4_descriptor_set_binding_layout *binding_layout = &set_layout->binding[binding];

   /* For input attachments, the shader includes the attachment_idx. As we are
    * treating them as a texture, we only want the base_index
    */
   uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ? deref->var->data.index + base_index : base_index;

   int desc_index = descriptor_map_add(is_sampler ? &pipeline->sampler_map : &pipeline->texture_map,
                                       set,
                                       binding,
                                       array_index,
                                       binding_layout->array_size,
                                       instr->is_shadow);

   if (is_sampler)
      instr->sampler_index = desc_index;
   else
      instr->texture_index = desc_index;
}

static void
lower_vulkan_resource_index(nir_builder *b,
                            nir_intrinsic_instr *instr,
                            struct vc4_pipeline *pipeline,
                            const struct vc4_pipeline_layout *layout)
{
   assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);

   nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);

   unsigned set = nir_intrinsic_desc_set(instr);
   unsigned binding = nir_intrinsic_binding(instr);

   struct vc4_descriptor_set_layout *set_layout = layout->set[set].layout;
   struct vc4_descriptor_set_binding_layout *binding_layout =
       &set_layout->binding[binding];
   unsigned index = 0;

   switch (nir_intrinsic_desc_type(instr)) {
   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
      struct vc4_descriptor_map *descriptor_map =
         nir_intrinsic_desc_type(instr) == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ?
         &pipeline->ubo_map : &pipeline->ssbo_map;

      if (!const_val)
         unreachable("non-constant vulkan_resource_index array index");

      index = descriptor_map_add(descriptor_map, set, binding,
                                 const_val->u32,
                                 binding_layout->array_size,
                                 false /* is_shadow: Doesn't really matter in this case */);

      if (nir_intrinsic_desc_type(instr) == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
         /* skip index 0 which is used for push constants */
         index++;
      }
      break;
   }

   default:
      unreachable("unsupported desc_type for vulkan_resource_index");
      break;
   }

   b->cursor = nir_before_instr(&instr->instr);

   nir_ssa_def *ssa_index = nir_imm_int(b, index);
   nir_ssa_def_rewrite_uses(&instr->dest.ssa,
                            nir_src_for_ssa(ssa_index));
   nir_instr_remove(&instr->instr);
}

static void
vc4_nir_lower_io_instr(nir_builder *b,
                       struct nir_instr *instr, struct vc4_pipeline *pipeline)
{
   if (instr->type != nir_instr_type_intrinsic)
      return;
   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);

   switch (intr->intrinsic) {
   case nir_intrinsic_vulkan_resource_index:
      lower_vulkan_resource_index(b, intr, pipeline, pipeline->layout);
      break;
   case nir_intrinsic_load_vulkan_descriptor: {
      /* We are not using it, as loading the descriptor happens as part of the
       * load/store instruction, so the simpler is just doing a no-op. We just
       * lower the desc back to a vec2, as it is what load_ssbo/ubo expects.
       */
      nir_ssa_def *desc = nir_vec2(b, intr->src[0].ssa, nir_imm_int(b, 0));
      nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(desc));
      nir_instr_remove(&intr->instr);
      break;
   }
   default:
      break;
   }
}

static bool
vc4_nir_lower_tex(nir_builder *b,
                  nir_instr *instr,
                  struct vc4_pipeline *pipeline)
{

   nir_tex_instr *tex = nir_instr_as_tex(instr);
   const struct vc4_pipeline_layout *layout = pipeline->layout;

   int texture_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
   if (texture_idx >= 0)
      lower_tex_src_to_offset(b, tex, texture_idx, pipeline, layout);

   int sampler_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
   if (sampler_idx >= 0)
      lower_tex_src_to_offset(b, tex, sampler_idx, pipeline, layout);

   if (texture_idx < 0 && sampler_idx < 0)
      return false;

   // int combined_index =
   //    get_combined_index(pipeline,
   //                       tex->texture_index,
   //                       sampler_idx < 0 ? VC4_NO_SAMPLER_IDX : tex->sampler_index);

   // tex->texture_index = combined_index;
   // tex->sampler_index = combined_index;

   return true;
}

static bool
vc4_nir_lower_io_impl(nir_function_impl *impl, struct vc4_pipeline *pipeline)
{
   nir_builder b;
   nir_builder_init(&b, impl);

   nir_foreach_block(block, impl)
   {
      nir_foreach_instr_safe(instr, block) {
         switch (instr->type) {
         case nir_instr_type_intrinsic:
            vc4_nir_lower_io_instr(&b, instr, pipeline);
            break;
         case nir_instr_type_tex:
            vc4_nir_lower_tex(&b, instr, pipeline);
            break;
         default:
            break;
         }
      }
   }

   nir_metadata_preserve(impl, nir_metadata_block_index |
                               nir_metadata_dominance);

   return true;
}

static void vc4_pipeline_nir_lower_io(nir_shader *s, struct vc4_pipeline *pipeline)
{
   nir_foreach_function(function, s)
   {
      if (function->impl)
         vc4_nir_lower_io_impl(function->impl, pipeline);
   }
}

static void
vc4_optimize_nir(struct nir_shader *s)
{
        bool progress;
        unsigned lower_flrp =
                (s->options->lower_flrp16 ? 16 : 0) |
                (s->options->lower_flrp32 ? 32 : 0) |
                (s->options->lower_flrp64 ? 64 : 0);

        do {
                progress = false;

                NIR_PASS_V(s, nir_lower_vars_to_ssa);
                NIR_PASS_V(s, nir_opt_deref);
                NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
                NIR_PASS(progress, s, nir_lower_phis_to_scalar);
                NIR_PASS(progress, s, nir_copy_prop);
                NIR_PASS(progress, s, nir_opt_remove_phis);
                NIR_PASS(progress, s, nir_opt_dce);
                NIR_PASS(progress, s, nir_opt_dead_cf);
                NIR_PASS(progress, s, nir_opt_cse);
                NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
                NIR_PASS(progress, s, nir_opt_algebraic);
                NIR_PASS(progress, s, nir_opt_constant_folding);
                if (lower_flrp != 0) {
                        bool lower_flrp_progress = false;

                        NIR_PASS(lower_flrp_progress, s, nir_lower_flrp,
                                 lower_flrp,
                                 false /* always_precise */);
                        if (lower_flrp_progress) {
                                NIR_PASS(progress, s, nir_opt_constant_folding);
                                progress = true;
                        }

                        /* Nothing should rematerialize any flrps, so we only
                         * need to do this lowering once.
                         */
                        lower_flrp = 0;
                }

                NIR_PASS(progress, s, nir_opt_undef);
                NIR_PASS(progress, s, nir_opt_loop_unroll,
                         nir_var_shader_in |
                         nir_var_shader_out |
                         nir_var_function_temp);
        } while (progress);
}

/* FIXME: This is basically the same code at anv, tu and radv. Move to common
 * place?
 */
static struct nir_spirv_specialization*
vk_spec_info_to_nir_spirv(const VkSpecializationInfo *spec_info,
                          uint32_t *out_num_spec_entries)
{
   if (spec_info == NULL || spec_info->mapEntryCount == 0)
      return NULL;

   uint32_t num_spec_entries = spec_info->mapEntryCount;
   struct nir_spirv_specialization *spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));

   for (uint32_t i = 0; i < num_spec_entries; i++) {
      VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
      const void *data = spec_info->pData + entry.offset;
      assert(data + entry.size <= spec_info->pData + spec_info->dataSize);

      spec_entries[i].id = spec_info->pMapEntries[i].constantID;
      switch (entry.size) {
      case 8:
         spec_entries[i].value.u64 = *(const uint64_t *)data;
         break;
      case 4:
         spec_entries[i].value.u32 = *(const uint32_t *)data;
         break;
      case 2:
         spec_entries[i].value.u16 = *(const uint16_t *)data;
         break;
      case 1:
         spec_entries[i].value.u8 = *(const uint8_t *)data;
         break;
      default:
         assert(!"Invalid spec constant size");
         break;
      }
   }

   *out_num_spec_entries = num_spec_entries;
   return spec_entries;
}


static nir_shader *
shader_module_compile_to_nir(struct vc4_device *device,
                             struct vc4_pipeline_stage *stage,
                             struct vc4_pipeline *pipeline)
{
   nir_shader *nir;
   const nir_shader_compiler_options *nir_options = &vc4_nir_options;

   if (!stage->module->nir) {
      uint32_t *spirv = (uint32_t *) stage->module->data;
      assert(stage->module->size % 4 == 0);

      // if (V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV)
         // vc4_print_spirv(stage->module->data, stage->module->size, stderr);

      uint32_t num_spec_entries = 0;
      struct nir_spirv_specialization *spec_entries =
         vk_spec_info_to_nir_spirv(stage->spec_info, &num_spec_entries);
      const struct spirv_to_nir_options spirv_options = default_spirv_options;
      nir = spirv_to_nir(spirv, stage->module->size / 4,
                         spec_entries, num_spec_entries,
                         stage->stage, stage->entrypoint,
                         &spirv_options, nir_options);
      nir_validate_shader(nir, "after spirv_to_nir");
      free(spec_entries);
   } else {
      /* For NIR modules created by the driver we can't consume the NIR
       * directly, we need to clone it first, since ownership of the NIR code
       * (as with SPIR-V code for SPIR-V shaders), belongs to the creator
       * of the module and modules can be destroyed immediately after been used
       * to create pipelines.
       */
      nir = nir_shader_clone(NULL, stage->module->nir);
      nir_validate_shader(nir, "nir module");
   }
   assert(nir->info.stage == stage->stage);

   // if (V3D_DEBUG & (V3D_DEBUG_NIR |
   //                  v3d_debug_flag_for_shader_stage(stage->stage))) {
   //    fprintf(stderr, "Initial form: %s prog %d NIR:\n",
   //            gl_shader_stage_name(stage->stage),
   //            stage->program_id);
   //    nir_print_shader(nir, stderr);
   //    fprintf(stderr, "\n");
   // }

   /* We have to lower away local variable initializers right before we
    * inline functions.  That way they get properly initialized at the top
    * of the function and not at the top of its caller.
    */
   NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
   NIR_PASS_V(nir, nir_lower_returns);
   NIR_PASS_V(nir, nir_inline_functions);
   NIR_PASS_V(nir, nir_opt_deref);
   NIR_PASS_V(nir, nir_copy_prop);

   foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
      if (!func->is_entrypoint)
         exec_node_remove(&func->node);
   }
   assert(exec_list_length(&nir->functions) == 1);
   NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp);

   NIR_PASS_V(nir, nir_split_var_copies);
   NIR_PASS_V(nir, nir_split_per_member_structs);

   NIR_PASS_V(nir, nir_remove_dead_variables,
            nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
            NULL);

   NIR_PASS_V(nir, nir_propagate_invariant);

   NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);

   NIR_PASS_V(nir, nir_lower_global_vars_to_local);
   NIR_PASS_V(nir, nir_split_var_copies);
   NIR_PASS_V(nir, nir_lower_var_copies);

   NIR_PASS_V(nir, nir_opt_copy_prop_vars);
   NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);

   NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out, UINT32_MAX);

   NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);

   nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, stage->stage);
   nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, stage->stage);

   NIR_PASS_V(nir, nir_lower_system_values);
   NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);

   NIR_PASS_V(nir, nir_lower_frexp);

   // NIR_PASS_V(nir, nir_lower_explicit_io,
   //          nir_var_mem_ubo | nir_var_mem_ssbo,
   //          nir_address_format_vec2_index_32bit_offset);

   NIR_PASS_V(nir, nir_lower_explicit_io,
              nir_var_mem_push_const,
              nir_address_format_32bit_offset);

   NIR_PASS_V(nir, nir_lower_explicit_io,
              nir_var_mem_ubo | nir_var_mem_ssbo,
              nir_address_format_32bit_index_offset);

   if (nir->info.stage == MESA_SHADER_VERTEX)
      NIR_PASS_V(nir, nir_lower_point_size, 1.0f, 0.0f);

   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
   NIR_PASS_V(nir, nir_normalize_cubemap_coords);

   NIR_PASS_V(nir, nir_lower_load_const_to_scalar);

   /* Vulkan uses the separate-shader linking model */
   nir->info.separate_shader = true;

   vc4_optimize_nir(nir);

   vc4_pipeline_nir_lower_io(nir, pipeline);

   NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
            type_size, (nir_lower_io_options)0);

   vc4_optimize_nir(nir);

   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);

   /* Garbage collect dead instructions */
   nir_sweep(nir);

   if (vc4_debug & VC4_DEBUG_NIR) {
         fprintf(stderr, "%s prog NIR:\n",
               gl_shader_stage_name(stage->stage));
         nir_print_shader(nir, stderr);
         fprintf(stderr, "\n");
   }

   return nir;
}

static nir_shader*
pipeline_stage_get_nir(struct vc4_pipeline_stage *p_stage,
                       struct vc4_pipeline *pipeline,
                       struct vc4_pipeline_cache *cache)
{
   nir_shader *nir = NULL;

   // nir = vc4_pipeline_cache_search_for_nir(pipeline, cache,
   //                                          &vc4_nir_options,
   //                                          p_stage->shader_sha1);

   // if (nir) {
   //    assert(nir->info.stage == p_stage->stage);
   //    return nir;
   // }

   nir = shader_module_compile_to_nir(pipeline->device, p_stage, pipeline);

   if (nir) {
      // struct vc4_pipeline_cache *default_cache =
      //    &pipeline->device->default_pipeline_cache;

      // vc4_pipeline_cache_upload_nir(pipeline, cache, nir,
      //                                p_stage->shader_sha1);

      // /* Ensure that the variant is on the default cache, as cmd_buffer could
      //  * need to change the current variant
      //  */
      // if (default_cache != cache) {
      //    vc4_pipeline_cache_upload_nir(pipeline, default_cache, nir,
      //                                   p_stage->shader_sha1);
      // }
      return nir;
   }

   /* FIXME: this shouldn't happen, raise error? */
   return NULL;
}

static VkResult
pipeline_compile_graphics(struct vc4_pipeline *pipeline,
                          struct vc4_pipeline_cache *cache,
                          const VkGraphicsPipelineCreateInfo *pCreateInfo,
                          const VkAllocationCallbacks *pAllocator)
{
   struct vc4_device *device = pipeline->device;
   struct vc4_physical_device *physical_device = device->physical_device;

   /* First pass to get the the common info from the shader and the nir
    * shader. We don't care of the coord shader for now.
    */
   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
      const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
      gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);

      struct vc4_pipeline_stage *p_stage =
         vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,
                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);

      if (p_stage == NULL)
         return VK_ERROR_OUT_OF_HOST_MEMORY;

      /* Note that we are assigning program_id slightly differently that
       * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin
       * would have a different program_id, while v3d would have the same for
       * both. For the case of v3dv, it is more natural to have an id this way,
       * as right now we are using it for debugging, not for shader-db.
       */
      p_stage->program_id =
         p_atomic_inc_return(&physical_device->next_program_id);
      p_stage->compiled_variant_count = 0;

      p_stage->pipeline = pipeline;
      p_stage->stage = stage;
      if (stage == MESA_SHADER_VERTEX)
         p_stage->is_coord = false;
      p_stage->entrypoint = sinfo->pName;
      p_stage->module = vc4_shader_module_from_handle(sinfo->module);
      p_stage->spec_info = sinfo->pSpecializationInfo;

      // pipeline_hash_shader(p_stage->module,
      //                      p_stage->entrypoint,
      //                      stage,
      //                      p_stage->spec_info,
      //                      p_stage->shader_sha1);

      pipeline->active_stages |= sinfo->stage;

      p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);

      switch(stage) {
      case MESA_SHADER_VERTEX:
         pipeline->vs = p_stage;
         break;
      case MESA_SHADER_FRAGMENT:
         pipeline->fs = p_stage;
         break;
      default:
         unreachable("not supported shader stage");
      }
   }

   return VK_SUCCESS;
}

static unsigned
vc4_dynamic_state_mask(VkDynamicState state)
{
   switch(state) {
   case VK_DYNAMIC_STATE_VIEWPORT:
      return VC4_DYNAMIC_VIEWPORT;
   case VK_DYNAMIC_STATE_SCISSOR:
      return VC4_DYNAMIC_SCISSOR;
   case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
      return VC4_DYNAMIC_STENCIL_COMPARE_MASK;
   case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
      return VC4_DYNAMIC_STENCIL_WRITE_MASK;
   case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
      return VC4_DYNAMIC_STENCIL_REFERENCE;
   case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
      return VC4_DYNAMIC_BLEND_CONSTANTS;
   case VK_DYNAMIC_STATE_DEPTH_BIAS:
      return VC4_DYNAMIC_DEPTH_BIAS;
   case VK_DYNAMIC_STATE_LINE_WIDTH:
      return VC4_DYNAMIC_LINE_WIDTH;

   /* Depth bounds testing is not available in in V3D 4.2 so here we are just
    * ignoring this dynamic state. We are already asserting at pipeline creation
    * time that depth bounds testing is not enabled.
    */
   case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
      return 0;

   default:
      unreachable("Unhandled dynamic state");
   }
}

static void
pipeline_init_dynamic_state(
   struct vc4_pipeline *pipeline,
   const VkPipelineDynamicStateCreateInfo *pDynamicState,
   const VkPipelineViewportStateCreateInfo *pViewportState,
   const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState,
   const VkPipelineColorBlendStateCreateInfo *pColorBlendState,
   const VkPipelineRasterizationStateCreateInfo *pRasterizationState)
{
   pipeline->dynamic_state = default_dynamic_state;
   struct vc4_dynamic_state *dynamic = &pipeline->dynamic_state;

   /* Create a mask of enabled dynamic states */
   uint32_t dynamic_states = 0;
   if (pDynamicState) {
      uint32_t count = pDynamicState->dynamicStateCount;
      for (uint32_t s = 0; s < count; s++) {
         dynamic_states |=
            vc4_dynamic_state_mask(pDynamicState->pDynamicStates[s]);
      }
   }

   /* For any pipeline states that are not dynamic, set the dynamic state
    * from the static pipeline state.
    */
   if (pViewportState) {
      if (!(dynamic_states & VC4_DYNAMIC_VIEWPORT)) {
         dynamic->viewport.count = pViewportState->viewportCount;
         typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports,
                      pViewportState->viewportCount);

         for (uint32_t i = 0; i < dynamic->viewport.count; i++) {
            vc4_viewport_compute_xform(&dynamic->viewport.viewports[i],
                                        dynamic->viewport.scale[i],
                                        dynamic->viewport.translate[i]);
         }
      }

      if (!(dynamic_states & VC4_DYNAMIC_SCISSOR)) {
         dynamic->scissor.count = pViewportState->scissorCount;
         typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors,
                      pViewportState->scissorCount);
      }
   }

   if (pDepthStencilState) {
      if (!(dynamic_states & VC4_DYNAMIC_STENCIL_COMPARE_MASK)) {
         dynamic->stencil_compare_mask.front =
            pDepthStencilState->front.compareMask;
         dynamic->stencil_compare_mask.back =
            pDepthStencilState->back.compareMask;
      }

      if (!(dynamic_states & VC4_DYNAMIC_STENCIL_WRITE_MASK)) {
         dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask;
         dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask;
      }

      if (!(dynamic_states & VC4_DYNAMIC_STENCIL_REFERENCE)) {
         dynamic->stencil_reference.front = pDepthStencilState->front.reference;
         dynamic->stencil_reference.back = pDepthStencilState->back.reference;
      }
   }

   if (pColorBlendState && !(dynamic_states & VC4_DYNAMIC_BLEND_CONSTANTS)) {
      memcpy(dynamic->blend_constants, pColorBlendState->blendConstants,
             sizeof(dynamic->blend_constants));
   }

   if (pRasterizationState) {
      if (pRasterizationState->depthBiasEnable &&
          !(dynamic_states & VC4_DYNAMIC_DEPTH_BIAS)) {
         dynamic->depth_bias.depthBiasEnable = pRasterizationState->depthBiasEnable;
         dynamic->depth_bias.constant_factor =
            pRasterizationState->depthBiasConstantFactor;
         dynamic->depth_bias.slope_factor =
            pRasterizationState->depthBiasSlopeFactor;
      }
      if (!(dynamic_states & VC4_DYNAMIC_LINE_WIDTH))
         dynamic->line_width = pRasterizationState->lineWidth;
   }

   pipeline->dynamic_state.mask = dynamic_states;
}

static int
pipe_vertex_compar(const void *in_a, const void *in_b)
{
   const struct pipe_vertex_element *a = in_a;

   if (a->src_format == PIPE_FORMAT_NONE)
      return 1;

   return -1;
}

static inline unsigned vk_conv_blend_factor(enum VkBlendFactor vk_factor)
{
   switch (vk_factor) {
   case VK_BLEND_FACTOR_ZERO:
      return PIPE_BLENDFACTOR_ZERO;
   case VK_BLEND_FACTOR_ONE:
      return PIPE_BLENDFACTOR_ONE;
   case VK_BLEND_FACTOR_SRC_COLOR:
      return PIPE_BLENDFACTOR_SRC_COLOR;
   case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
      return PIPE_BLENDFACTOR_INV_SRC_COLOR;
   case VK_BLEND_FACTOR_DST_COLOR:
      return PIPE_BLENDFACTOR_DST_COLOR;
   case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
      return PIPE_BLENDFACTOR_INV_DST_COLOR;
   case VK_BLEND_FACTOR_SRC_ALPHA:
      return PIPE_BLENDFACTOR_SRC_ALPHA;
   case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
      return PIPE_BLENDFACTOR_INV_SRC_ALPHA;
   case VK_BLEND_FACTOR_DST_ALPHA:
      return PIPE_BLENDFACTOR_DST_ALPHA;
   case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
      return PIPE_BLENDFACTOR_INV_DST_ALPHA;
   case VK_BLEND_FACTOR_CONSTANT_COLOR:
      return PIPE_BLENDFACTOR_CONST_COLOR;
   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
      return PIPE_BLENDFACTOR_INV_CONST_COLOR;
   case VK_BLEND_FACTOR_CONSTANT_ALPHA:
      return PIPE_BLENDFACTOR_CONST_ALPHA;
   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
      return PIPE_BLENDFACTOR_INV_CONST_ALPHA;
   case VK_BLEND_FACTOR_SRC1_COLOR:
      return PIPE_BLENDFACTOR_SRC1_COLOR;
   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
      return PIPE_BLENDFACTOR_INV_SRC1_COLOR;
   case VK_BLEND_FACTOR_SRC1_ALPHA:
      return PIPE_BLENDFACTOR_SRC1_ALPHA;
   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
      return PIPE_BLENDFACTOR_INV_SRC1_ALPHA;
   case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
      return PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE;
   default:
      assert(0);
      return 0;
   }
}

static inline unsigned vk_conv_blend_func(enum VkBlendOp op)
{
   switch (op) {
   case VK_BLEND_OP_ADD:
      return PIPE_BLEND_ADD;
   case VK_BLEND_OP_SUBTRACT:
      return PIPE_BLEND_SUBTRACT;
   case VK_BLEND_OP_REVERSE_SUBTRACT:
      return PIPE_BLEND_REVERSE_SUBTRACT;
   case VK_BLEND_OP_MIN:
      return PIPE_BLEND_MIN;
   case VK_BLEND_OP_MAX:
      return PIPE_BLEND_MAX;
   default:
      assert(0);
      return 0;
   }
}

static inline unsigned vk_conv_blend_logicOp(VkLogicOp op)
{
   switch (op) {
   case VK_LOGIC_OP_CLEAR:
      return PIPE_LOGICOP_CLEAR;
   case VK_LOGIC_OP_AND:
      return PIPE_LOGICOP_AND;
   case VK_LOGIC_OP_AND_REVERSE:
      return PIPE_LOGICOP_AND_REVERSE;
   case VK_LOGIC_OP_COPY:
      return PIPE_LOGICOP_COPY;
   case VK_LOGIC_OP_AND_INVERTED:
      return PIPE_LOGICOP_AND_INVERTED;
   case VK_LOGIC_OP_NO_OP:
      return PIPE_LOGICOP_NOOP;
   case VK_LOGIC_OP_XOR:
      return PIPE_LOGICOP_XOR;
   case VK_LOGIC_OP_OR:
      return PIPE_LOGICOP_OR;
   case VK_LOGIC_OP_NOR:
      return PIPE_LOGICOP_NOR;
   case VK_LOGIC_OP_EQUIVALENT:
      return PIPE_LOGICOP_EQUIV;
   case VK_LOGIC_OP_INVERT:
      return PIPE_LOGICOP_INVERT;
   case VK_LOGIC_OP_OR_REVERSE:
      return PIPE_LOGICOP_OR_REVERSE;
   case VK_LOGIC_OP_COPY_INVERTED:
      return PIPE_LOGICOP_COPY_INVERTED;
   case VK_LOGIC_OP_OR_INVERTED:
      return PIPE_LOGICOP_OR_INVERTED;
   case VK_LOGIC_OP_NAND:
      return PIPE_LOGICOP_NAND;
   case VK_LOGIC_OP_SET:
      return PIPE_LOGICOP_SET;
   default:
      return PIPE_LOGICOP_COPY;
   }
}

static void
pack_blend(struct vc4_pipeline *pipeline,
           const VkPipelineColorBlendStateCreateInfo *cb)
{
   int i;

   struct pipe_blend_state *blend_state = &pipeline->blend_state;
   struct pipe_blend_color *blend_color = &pipeline->blend_color;
   struct vc4_dynamic_state *dynamic_state = &pipeline->dynamic_state;

   blend_state->independent_blend_enable = false;

   if (!cb)
      return;

   blend_state->logicop_enable = cb->logicOpEnable;
   blend_state->logicop_func = cb->logicOpEnable ? vk_conv_blend_logicOp(cb->logicOp) : PIPE_LOGICOP_COPY;

   if (cb->attachmentCount > 1)
      blend_state->independent_blend_enable = true;
   for (i = 0; i < cb->attachmentCount; i++) {
      blend_state->rt[i].colormask = cb->pAttachments[i].colorWriteMask;
      blend_state->rt[i].blend_enable = cb->pAttachments[i].blendEnable;
      blend_state->rt[i].rgb_func = vk_conv_blend_func(cb->pAttachments[i].colorBlendOp);
      blend_state->rt[i].rgb_src_factor = vk_conv_blend_factor(cb->pAttachments[i].srcColorBlendFactor);
      blend_state->rt[i].rgb_dst_factor = vk_conv_blend_factor(cb->pAttachments[i].dstColorBlendFactor);
      blend_state->rt[i].alpha_func = vk_conv_blend_func(cb->pAttachments[i].alphaBlendOp);
      blend_state->rt[i].alpha_src_factor = vk_conv_blend_factor(cb->pAttachments[i].srcAlphaBlendFactor);
      blend_state->rt[i].alpha_dst_factor = vk_conv_blend_factor(cb->pAttachments[i].dstAlphaBlendFactor);
   }

   blend_state->max_rt = i;

   if (!(dynamic_state->mask & VC4_DYNAMIC_BLEND_CONSTANTS)) {
      memcpy(blend_color->color, cb->blendConstants, 4 * sizeof(float));
   }
}

static enum pipe_stencil_op
vkStencilop_cov(VkStencilOp vk_op)
{
   switch (vk_op) {
   case VK_STENCIL_OP_KEEP:
      return PIPE_STENCIL_OP_KEEP;
   case VK_STENCIL_OP_ZERO:
      return PIPE_STENCIL_OP_ZERO;
   case VK_STENCIL_OP_REPLACE:
      return PIPE_STENCIL_OP_REPLACE;
   case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
      return PIPE_STENCIL_OP_INCR;
   case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
      return PIPE_STENCIL_OP_DECR;
   case VK_STENCIL_OP_INVERT:
      return PIPE_STENCIL_OP_INVERT;
   case VK_STENCIL_OP_INCREMENT_AND_WRAP:
      return PIPE_STENCIL_OP_INCR_WRAP;
   case VK_STENCIL_OP_DECREMENT_AND_WRAP:
      return PIPE_STENCIL_OP_DECR_WRAP;
   default:
      assert(0);
      return -1;
   }
}

static void
vk_stencil_cov(const VkStencilOpState *vk_stencil, struct pipe_stencil_state *stencil)
{
   stencil->enabled = 1;

   stencil->func = vk_stencil->compareOp;

   stencil->fail_op = vkStencilop_cov(vk_stencil->failOp);
   stencil->zpass_op = vkStencilop_cov(vk_stencil->passOp);
   stencil->zfail_op = vkStencilop_cov(vk_stencil->depthFailOp);

   stencil->valuemask = vk_stencil->compareMask;
   stencil->writemask = vk_stencil->writeMask;
}

static void
pack_stencil_cfg(struct vc4_pipeline *pipeline,
                 const VkPipelineDepthStencilStateCreateInfo *ds_info)
{
   struct pipe_depth_stencil_alpha_state *ds = &pipeline->ds_state;

   if (ds_info->stencilTestEnable) {
      vk_stencil_cov(&ds_info->front, &ds->stencil[0]);
      pipeline->stencil_ref.ref_value[0] = ds_info->front.reference;
      vk_stencil_cov(&ds_info->back, &ds->stencil[1]);
      pipeline->stencil_ref.ref_value[1] = ds_info->back.reference;
   }

   if (ds_info->depthTestEnable) {
      ds->depth.enabled = 1;
      ds->depth.func = ds_info->depthCompareOp;
      ds->depth.writemask = ds_info->depthWriteEnable ? 1 : 0;
   }

   assert(~ds_info->depthBoundsTestEnable);
}

static VkResult
pipeline_init(struct vc4_pipeline *pipeline,
              struct vc4_device *device,
              struct vc4_pipeline_cache *cache,
              const VkGraphicsPipelineCreateInfo *pCreateInfo,
              const VkAllocationCallbacks *pAllocator)
{
   VkResult result = VK_SUCCESS;

   pipeline->device = device;

   VC4_FROM_HANDLE(vc4_pipeline_layout, layout, pCreateInfo->layout);
   pipeline->layout = layout;

   VC4_FROM_HANDLE(vc4_render_pass, render_pass, pCreateInfo->renderPass);
   assert(pCreateInfo->subpass < render_pass->subpass_count);
   pipeline->pass = render_pass;
   pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];

   /* If rasterization is not enabled, various CreateInfo structs must be
    * ignored.
    */
   const bool raster_enabled =
      !pCreateInfo->pRasterizationState->rasterizerDiscardEnable;

   const VkPipelineViewportStateCreateInfo *vp_info =
      raster_enabled ? pCreateInfo->pViewportState : NULL;

   const VkPipelineDepthStencilStateCreateInfo *ds_info =
      raster_enabled ? pCreateInfo->pDepthStencilState : NULL;

   const VkPipelineRasterizationStateCreateInfo *rs_info =
      raster_enabled ? pCreateInfo->pRasterizationState : NULL;

   const VkPipelineColorBlendStateCreateInfo *cb_info =
      raster_enabled ? pCreateInfo->pColorBlendState : NULL;

   const VkPipelineMultisampleStateCreateInfo *ms_info =
      raster_enabled ? pCreateInfo->pMultisampleState : NULL;

   pipeline_init_dynamic_state(pipeline,
                               pCreateInfo->pDynamicState,
                               vp_info, ds_info, cb_info, rs_info);

   pack_blend(pipeline, cb_info);
   pack_stencil_cfg(pipeline, ds_info);

   if (raster_enabled) {
      pipeline->cullMode = rs_info->cullMode;
      pipeline->frontFace = rs_info->frontFace;
      pipeline->rasterizationSamples = ms_info->rasterizationSamples;
   }

   pipeline->primitive_restart = pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
   pipeline->topology = pCreateInfo->pInputAssemblyState->topology;

   result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);

   if (result != VK_SUCCESS) {
      /* Caller would already destroy the pipeline, and we didn't allocate any
       * extra info. We don't need to do anything else.
       */
      return result;
   }

   const VkPipelineVertexInputStateCreateInfo *vi_info =
      pCreateInfo->pVertexInputState;

   pipeline->vb_count = vi_info->vertexBindingDescriptionCount;
   for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
      const VkVertexInputBindingDescription *desc =
         &vi_info->pVertexBindingDescriptions[i];

      pipeline->vb[desc->binding].stride = desc->stride;
      pipeline->vb[desc->binding].instance_divisor = desc->inputRate;
   }

   for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
      const VkVertexInputAttributeDescription *desc = &vi_info->pVertexAttributeDescriptions[i];
      uint32_t location = desc->location;

      if (location != -1) {
         assert(location < MAX_VERTEX_ATTRIBS);
         pipeline->vtx.pipe[location].src_offset = desc->offset;
         pipeline->vtx.pipe[location].vertex_buffer_index = desc->binding;
         pipeline->vtx.pipe[location].src_format = vk_format_to_pipe_format(desc->format);

         pipeline->vtx.num_elements++;
      }
   }

   qsort(pipeline->vtx.pipe, MAX_VERTEX_ATTRIBS, sizeof(struct pipe_vertex_element), pipe_vertex_compar);

   return result;
}

static void
vc4_destroy_pipeline(struct vc4_pipeline *pipeline,
                      struct vc4_device *device,
                      const VkAllocationCallbacks *pAllocator)
{
   if (!pipeline)
      return;

   vk_free2(&device->vk.alloc, pAllocator, pipeline);
}

static VkResult
graphics_pipeline_create(VkDevice _device,
                         VkPipelineCache _cache,
                         const VkGraphicsPipelineCreateInfo *pCreateInfo,
                         const VkAllocationCallbacks *pAllocator,
                         VkPipeline *pPipeline)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);
   VC4_FROM_HANDLE(vc4_pipeline_cache, cache, _cache);

   struct vc4_pipeline *pipeline;
   VkResult result;

   /* Use the default pipeline cache if none is specified */
   // if (cache == NULL && device->instance->pipeline_cache_enabled)
      //  cache = &device->default_pipeline_cache;

   pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   if (pipeline == NULL)
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);

   result = pipeline_init(pipeline, device, cache,
                          pCreateInfo,
                          pAllocator);

   if (result != VK_SUCCESS) {
      vc4_destroy_pipeline(pipeline, device, pAllocator);
      return result;
   }

   *pPipeline = vc4_pipeline_to_handle(pipeline);

   return VK_SUCCESS;
}

VkResult
vc4_CreateGraphicsPipelines(VkDevice _device,
                             VkPipelineCache pipelineCache,
                             uint32_t count,
                             const VkGraphicsPipelineCreateInfo *pCreateInfos,
                             const VkAllocationCallbacks *pAllocator,
                             VkPipeline *pPipelines)
{
   VkResult result = VK_SUCCESS;

   for (uint32_t i = 0; i < count; i++) {
      VkResult local_result;

      local_result = graphics_pipeline_create(_device,
                                              pipelineCache,
                                              &pCreateInfos[i],
                                              pAllocator,
                                              &pPipelines[i]);

      if (local_result != VK_SUCCESS) {
         result = local_result;
         pPipelines[i] = VK_NULL_HANDLE;
      }
   }

   return result;
}

void
vc4_DestroyPipeline(VkDevice _device,
                     VkPipeline _pipeline,
                     const VkAllocationCallbacks *pAllocator)
{
   VC4_FROM_HANDLE(vc4_device, device, _device);
   VC4_FROM_HANDLE(vc4_pipeline, pipeline, _pipeline);

   vc4_destroy_pipeline(pipeline, device, pAllocator);
}
